---
title: "NYC Air Quality Dashboard"
output:
flexdashboard::flex_dashboard:
orientation: columns
vertical_layout: fill
source: embed
runtime: shiny
---
```{r dashboard setup, include = FALSE}
# Libraries
library(flexdashboard)
library(leaflet)
library(tidyverse)
library(httr)
library(data.table)
# Theme
theme_set(theme_minimal() + theme(legend.position = "bottom"))
options(
ggplot2.continuous.colour = "viridis",
ggplot2.continuous.fill = "viridis"
)
scale_colour_discrete = scale_colour_viridis_d
scale_fill_discrete = scale_fill_viridis_d
```
```{r air quality data cleaning, include = FALSE}
# Data import and cleaning
air_quality = fread("./data/waqi-covid19-airqualitydata-2020.csv") %>%
filter(Country == "US") %>%
filter(City == "Brooklyn" | City == "Queens" | City == "The Bronx" | City == "Staten Island" | City == "Manhattan") %>%
mutate(borough = City) %>%
select(-c(City,Country)) %>%
pivot_wider(names_from = "Specie", values_from = c("count","min","max","median","variance")) %>%
janitor::clean_names()
fwrite(air_quality,"./data/Just_NYC_Air_Quality_Data.csv")
# Adding air quality index
AQI_temp = air_quality %>%
select(date,borough,median_pm25,median_o3,median_co,median_no2)
AQI_formula_O3 = function(AQI){
if (AQI <= 54 && AQI >= 0 ){
AQI_O3 = (50/54)*(AQI-0) + 0
}
}
AQI_formula_pm25 = function(AQI){
if (AQI<= 12&& AQI>=0 ){
AQI_pm25 = (50/12)*(AQI-0) + 0
}
else if (AQI<=35.4 && AQI>=12.1 ){
AQI_pm25 = (50/23.3)*(AQI-12.1) + 51
}
else if (AQI<=55.4 && AQI>=35.5 ){
AQI_pm25 = (50/19.9)*(AQI-35.5) + 101
}
else{
AQI_pm25 = (50/94.9)*(AQI-55.5) + 151
}
}
AQI_formula_co = function(AQI){
if (AQI<= 4.4&& AQI>=0 ){
AQI_co = (50/4.4)*(AQI-0) + 0
}
else if (AQI<=9.4 && AQI>=4.5 ){
AQI_co = (50/4.9)*(AQI-4.5) + 51
}
else{
AQI_co = (50/2.9)*(AQI-12.4) + 101
}
}
AQI_formula_no2 = function(AQI){
if (AQI<= 53&& AQI>=0 ){
AQI_no2 = (50/53)*(AQI-0) + 0
}
else if (AQI<=100 && AQI>=54 ){
AQI_no2 = (50/46)*(AQI-54) + 51
}
else{
AQI_no2 = (50/259)*(AQI-101) + 101
}
}
AQI_O3 = AQI_temp%>%
select(median_o3)%>%
purrr::map(~AQI_formula_O3(.x))%>%
data.frame()
AQI_pm25 = AQI_temp%>%
select(median_pm25)%>%
purrr::map(~AQI_formula_pm25(.x))%>%
data.frame()
AQI_co = AQI_temp%>%
select(median_co)%>%
purrr::map(~AQI_formula_co(.x))%>%
data.frame()
AQI_no2 = AQI_temp%>%
select(median_no2)%>%
purrr::map(~AQI_formula_no2(.x))%>%
data.frame()
AQI_combined = cbind(AQI_O3,AQI_co,AQI_no2,AQI_pm25)%>%
rowwise() %>%
mutate(AQI = max(median_o3,median_co,median_no2,median_pm25))%>%
cbind(air_quality%>%pull(date),
air_quality%>%pull(borough))
colnames(AQI_combined) = c("AQI_o3","AQI_co","AQI_no2","AQI_pm25","AQI_Final","date","borough")
AQI_combined = AQI_combined%>%
select(c("date","borough","AQI_Final","AQI_o3","AQI_co","AQI_no2","AQI_pm25"))%>%
mutate(AQI_Category = if_else(
AQI_Final<=50 & AQI_Final>=0,"Good",
if_else(AQI_Final<=100&AQI_Final>50,"Moderate",
if_else(AQI_Final<=150&AQI_Final>100,"Unhealthy for Sensitive Populations","Unhealthy"))))
fwrite(AQI_combined,"./data/Air_Quality_Data_with_AQI.csv")
```
```{r nyc-wide covid data cleaning, include = FALSE}
nyc_cases =
GET("https://data.cityofnewyork.us/resource/rc75-m7u3.csv") %>% # Reading in the data
content("parsed") %>%
rename(date = date_of_interest) %>% # Renaming the date variable for simplicity
select(date, case_count) %>% # Only retaining date and case count for simplicity
mutate(
date = as.Date(date, tryFormats = c("%Y-%m-%d")) # as.Date() by default cannot convert POSIXct to date; no data lost
)
## Note: This dataset also contains hospitalized count and death count.
## I removed those measures for now for simplicity, but we can always decide to retain them later if we think they would be useful.
```
```{r by borough covid data cleaning, cache = TRUE, include = FALSE}
nyc_daily_borough_testing =
read_csv(
"./data/covid_data/nyc_daily_covid.csv") %>%
janitor::clean_names() %>%
rename(date = date_of_interest) %>%
mutate(
date = as.Date(date, format = "%m/%d/%Y")
) %>%
pivot_longer(
bx_case_count:si_death_count_7day_avg,
names_to = "borough_variable",
values_to = "observed_value"
) %>%
mutate(
borough_variable = str_replace(borough_variable, "bx_", "Bronx/"),
borough_variable = str_replace(borough_variable, "bk_", "Brooklyn/"),
borough_variable = str_replace(borough_variable, "qn_", "Queens/"),
borough_variable = str_replace(borough_variable, "si_", "Staten Island/"),
borough_variable = str_replace(borough_variable, "mn_", "Manhattan/")
) %>%
separate(borough_variable, into = c("borough", "observation_type"), sep = "/") %>%
arrange(date, borough, observation_type, observed_value) %>%
pivot_longer(
case_count:incomplete,
names_to = "total_observation_type",
values_to = "total_observed_value"
) %>%
filter(observation_type == c("case_count", "case_count_7day_avg"))
# Add proof-of-concept covid policy changes to df
nyc_daily_borough_testing =
nyc_daily_borough_testing %>%
mutate(
policy_change = case_when(
date == "2020-03-07" ~ "State of Emergency",
date == "2020-03-17" ~ "DeBlasio / Cuomo Conflict on Shutdown",
date == "2020-03-22" ~ "PAUSE Order",
date == "2020-08-08" ~ "Phase 1 Opening",
date == "2020-08-17" ~ "Gyms Starting",
date == "2020-10-01" ~ "Primary Schools"),
dates_vline = date,
dates_vline = case_when(
date == "2020-03-07" ~ "TRUE",
date == "2020-03-17" ~ "TRUE",
date == "2020-03-22" ~ "TRUE",
date == "2020-08-08" ~ "TRUE",
date == "2020-08-17" ~ "TRUE",
date == "2020-10-01" ~ "TRUE")
)
```
Column {.sidebar}
-----------------------------------------------------------------------
Here we can write some introductory text/explain our project.
```{r input, eval = FALSE}
dateInput(
"date",
label = h3("Date input"),
value = "2020-02-29", # Default value
min = "2020-02-29", # Min value
max = "2020-11-17") # Max value
# I put in these dates based on the covid dataset, but we can update as needed
```
Column {data-width=500}
-----------------------------------------------------------------------
### Title for Line Graph
```{r example covid line graphs}
# Quick plot of covid cases by borough
nyc_daily_borough_testing %>%
filter(observation_type == c("case_count")) %>%
group_by(date, borough, observation_type) %>%
ggplot(aes(x = date)) +
geom_line(aes(x = date, y = observed_value, color = borough)) +
geom_smooth(
aes(x = date, y = observed_value, color = borough),
alpha = 1, se = F) +
scale_x_date(date_breaks = "1 month", date_labels = "%b %y") +
coord_cartesian(ylim = c(0,2000)) +
labs(
title = "Case Count per NYC Borough over Time",
x = "Week Number",
y = "Value",
caption = "P8105 Final Project")
# Quick plot of covid 7 day avg with reference lines
nyc_daily_borough_testing %>%
filter(observation_type == c("case_count_7day_avg")) %>%
group_by(date, borough, observation_type) %>%
ggplot(aes(x = date, y = observed_value, color = borough)) +
geom_line() +
scale_x_date(date_breaks = "1 month", date_labels = "%b %y") +
coord_cartesian(ylim = c(0,2000)) +
labs(
title = "7 Day Case Count Average per NYC Borough over Time",
x = "Week Number",
y = "Value",
caption = "P8105 Final Project")
```
```{r example air quality plots}
air_quality %>%
ggplot(aes(x = date, y = median_co, color = borough)) +
geom_point(alpha = .2) +
geom_line(alpha = .75, size = .75) +
scale_x_date(date_breaks = "1 month", date_labels = "%b %y") +
labs(
title = "Median CO Emmissions over Time for each NYC borough, 2020",
x = "Month",
y = "Median CO (Parts per Million)",
caption = "Examining COVID-19 Incidence, P8105 Final Project")
air_quality %>%
ggplot(aes(x = date, y = median_pm25, color = borough)) +
geom_point(alpha = .2) +
geom_line(alpha = .75, size = .75) +
scale_x_date(date_breaks = "1 month", date_labels = "%b %y") +
labs(
title = "Median Particulate Matter (size<2.5 micrometers) Levels over Time for each NYC borough, 2020",
x = "Month",
y = "Median Particulate Matter (size < 2.5 micrometeres) Levels (Parts per Millon)",
caption = "Examining COVID-19 Incidence, P8105 Final Project")
air_quality %>%
ggplot(aes(x = date, y = median_temperature, color = borough)) +
geom_point(alpha = .2) +
geom_line(alpha = .75, size = .75) +
scale_x_date(date_breaks = "1 month", date_labels = "%b %y") +
labs(
title = "Median Temperature over Time for each NYC borough, 2020",
x = "Month",
y = "Median Temperature (degrees C)",
caption = "Examining COVID-19 Incidence, P8105 Final Project")
AQI_combined %>%
group_by(borough) %>%
ggplot(aes(x = date, y = AQI_Final)) +
geom_line() +
geom_smooth()
```
Column {data-width=500}
-----------------------------------------------------------------------
### Title for Map
```{r, eval = FALSE}
boroughs = read_csv("./data/boroughs.csv") %>%
str_remove(the_geom, "")
separate(the_geom, into = c("name1", "name2"), sep = ",")
leaflet() %>%
addTiles() %>%
setView(-74.00, 40.71, zoom = 12) %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(lng = c(-73.89680883223774, -73.89693872998792), lat = c(40.79580844515979, 40.79563587285357))
addRectangles(lat1 = 40.68, lng1 = -74.06 , lat2 = 40.80, lng2 = -73.94)
```